/**
 * parser module
 * 
 * @author strlst <e11907086@student.tuwien.ac.at>
 * @date 2020-01-02
 * @brief contains method implementations for definitions that can be found
 * in the parser header, useful for both the server and client when dealing
 * with input that needs to be parsed
 */

#include "parser.h"

char *translate_requested_file(char *doc_root, char *index, char *req_file) {
    /* consider trimming last character of doc root */
    long int doc_root_length = strlen(doc_root);
    if (doc_root[doc_root_length - 1] == '/') {
        doc_root[doc_root_length - 1] = '\0';
        --doc_root_length;
    }

    /* save partial lengths */
    long int req_file_length = strlen(req_file);
    long int index_length = strlen(index);

    /* special case, append index */
    int is_index_file = req_file[strlen(req_file) - 1] == '/';

    /* calculate length of new file string */
    long int combined_length = doc_root_length
        + req_file_length
        + (is_index_file ? index_length : 0);

    /* combine strings */
    char *combined = malloc(combined_length + 1);
    strncpy(combined,                   doc_root, doc_root_length);
    strncpy(combined + doc_root_length, req_file, req_file_length);
    combined[doc_root_length + req_file_length] = '\0';
    if (is_index_file) {
        strncpy(combined + doc_root_length + req_file_length, index, index_length);
        combined[doc_root_length + req_file_length + index_length] = '\0';
    }

    /* free old string */
    free(req_file);
    
    return combined;
}

/**
 * @brief a wrapped component of the `parse_response_code(...)` method, useful
 * to avoid unreadable code and excessive code duplication, does the same but
 * with the buf parameters collapsed to a string containing a single line
 * @param program program name to print in the event of errors
 * @param line string containing the start-line of an http response
 * @return response codes as specified in the documentation of
 * `parse_response_code(...)`
 */
static uint16_t parse_response_code_wrapped(char *program, char *line) {
    /* setup variables */
    char *line_part = NULL;

    /* parse first block */
    if ((line_part = strsep(&line, " ")) == NULL)
        return RC_PROTOCOL_ERROR;

    /* protocol error */
    if (strcmp(line_part, "HTTP/1.1") != 0)
        return RC_PROTOCOL_ERROR;

    /* parse second block */
    if ((line_part = strsep(&line, " ")) == NULL)
        return RC_PROTOCOL_ERROR;

    /* as per documentation, reset errno for strtoul */
    errno = 0;
    uint16_t response_code = strtoul(line_part, NULL, 0);
    /* check for conversion errors */
    if (response_code == 0 && (errno == EINVAL || errno == ERANGE))
        return RC_PROTOCOL_ERROR;

    /* disallow every response code except for RC_OK */
    if (response_code != RC_OK) {
        fprintf(stderr, "%s: response is not '200 OK': %u %s\n", program, response_code, line);
        die("", NULL, EXIT_INVALID_RC);
    }

    return response_code;
}

uint16_t parse_response_code(char *program, char *buf, int buf_len) {
    /* setup */
    long int line_length = strchr(buf, '\n') - buf;

    /* no absurdities allowed */
    if (line_length > buf_len)
        die("%s: error parsing the response code buffer\n", program, EXIT_FAILURE);

    /* instead of strdup'ing the entire buffer, copy just the line */
    char *line = malloc(line_length + 1);
    if (line == NULL)
        die("%s: failed allocating line\n", program, EXIT_FAILURE);
    strncpy(line, buf, line_length);

    /* save original pointer and free afterwards */
    char *line_orig = line;
    uint16_t response_code = parse_response_code_wrapped(program, line);
    free(line_orig);

    if (response_code == RC_PROTOCOL_ERROR)
        die("%s: Protocol error!\n", program, EXIT_PROTOCOL_ERROR);

    return response_code;
}

/**
 * @brief a wrapped component of the `parse_request_file(...)` method, useful
 * to avoid unreadable code and excessive code duplication, does the same but
 * with the buf parameters collapsed to a string containing a single line
 * @param program program name to print in the event of errors
 * @param line string containing the start-line of an http request
 * @param ret pointer to uint16_t value used to communicate information about
 * the outcome of this method
 * @return requested file as specified in the documentation of
 * `parse_request_file(...)`
 */
static char *parse_request_file_wrapped(char *program, char *line, uint16_t *ret) {
    /* setup variables */
    char *line_part = NULL;
    *ret = RQ_OK;

    /* there has to be text in the header */
    if ((line_part = strsep(&line, " ")) == NULL) {
        *ret = RQ_PROTOCOL_ERROR;
        return NULL;
    }

    /* only GET supported */
    if (strcmp(line_part, "GET") != 0) {
        *ret = RQ_NOT_IMPLEMENTED;
        return NULL;
    }

    /* there has to be a second word in the header */
    if ((line_part = strsep(&line, " ")) == NULL) {
        *ret = RQ_PROTOCOL_ERROR;
        return NULL;
    }

    /* disallow relative path funkiness */
    if (strstr(line_part, "/../") != NULL) {
        *ret = RQ_PROTOCOL_ERROR;
        return NULL;
    }

    /* upon reaching the relevant part make a copy */
    char *file = strdup(line_part);

    /* there has to be a third word in the header */
    if ((line_part = strsep(&line, " ")) == NULL) {
        *ret = RQ_PROTOCOL_ERROR;
        return NULL;
    }

    if (strcmp(line_part, "HTTP/1.1\r") != 0) {
        *ret = RQ_PROTOCOL_ERROR;
        return NULL;
    }

    return file;
}

char *parse_request_file(char *program, char *buf, int buf_len, uint16_t *ret) {
    /* setup */
    long int line_length = strchr(buf, '\n') - buf;

    /* still no absurdities allowed */
    if (line_length > buf_len)
        die("%s: error parsing the response code buffer\n", program, EXIT_FAILURE);

    /* instead of strdup'ing the entire buffer, copy just the line */
    char *line = malloc(line_length + 1);
    memset(line, 0, line_length + 1);
    if (line == NULL)
        die("%s: failed allocating line\n", program, EXIT_FAILURE);
    strncpy(line, buf, line_length);

    /* save original pointer and free afterwards */
    char *line_orig = line;
    char *file = parse_request_file_wrapped(program, line, ret);
    free(line_orig);

    return file;
}

char *parse_url_webserver(char *program, char *url_str) {
    /* these characters delimit the webserver (hostname) from the rest */
    char *delimiters = ";/?:@=&";
    /* points to where the webserver (hostname) ends */
    /* the webserver part of url_str is at most url_str long */
    char *webserver_end_p = url_str + strlen(url_str);
    /* check each of the 7 delimiters */
    for (int i = 0; i < 7; i++) {
        /* find the position of the respective delimiter character */
        /* end_p points to this position or is NULL if the delimiter is not found */
        char *end_p = strchr(url_str, delimiters[i]);
        /* if end_p is set (delimiter found) and this delimiter comes */
        /* earlier than the last saved end position, set it anew */
        if (end_p != NULL && end_p < webserver_end_p)
            webserver_end_p = end_p;
    }

    /* calculate how long the webserver (hostname) part is */
    long int url_webserver_len = webserver_end_p - url_str;
    /* prepare for copy */
    char *url_webserver = malloc(url_webserver_len + 1);
    memset(url_webserver, 0, url_webserver_len + 1);
    /* error checking */
    if (url_webserver == NULL)
        die("%s: could not allocate url_webserver: %s\n", program, EXIT_FAILURE);
    /* copy relevant part of url_str */
    strncpy(url_webserver, url_str, url_webserver_len);

    return url_webserver;
}

char *parse_url_path(char *program, char *url_str_old) {
    /* preserve original string */
    char *url_str_new = strdup(url_str_old);
    char *url_str = url_str_new;
    /* remove webserver component to get last part of path */
    if (strsep(&url_str, "/") == NULL)
        die("%s: url contained no webserver component\n", program, EXIT_FAILURE);

    /* if url with removed webserver component is empty, path is / */
    if (url_str == NULL) {
        char *url_path = malloc(2);
        /* error checking */
        if (url_path == NULL)
            die("%s: could not allocate url_path: %s\n", program, EXIT_FAILURE);
        strcpy(url_path, "/");

        /* don't forget freeing for pre-emptive code path */
        free(url_str_new);

        return url_path;
    }

    /* copy result but prepend a '/' */
    long int url_str_len = strlen(url_str);
    char *url_path = malloc(1 + url_str_len + 1);
    memset(url_path, 0, 1 + url_str_len + 1);
    /* error checking */
    if (url_path == NULL)
        die("%s: could not allocate url_path: %s\n", program, EXIT_FAILURE);
    /* first character is statically set */
    url_path[0] = '/';
    /* ignore first static first char during copy */
    strcpy(url_path + 1, url_str);

    free(url_str_new);

    return url_path;
}

char *parse_url_file(char *program, char *url_str_old) {
    /* preserve original string */
    char *url_str_new = strdup(url_str_old);
    char *url_str = url_str_new;

    /* setup variables */
    int trailing_slash = url_str[strlen(url_str) - 1] == '/';
    char *url_part = NULL, *last_url_part = NULL;

    /* remove webserver component to get last part of path */
    if (strsep(&url_str, "/") == NULL)
        die("%s: url contained no webserver component\n", program, EXIT_FAILURE);

    /* if url with removed webserver component is empty, path is / */
    /* same as in the parse_url_path case */
    if (url_str == NULL)
        trailing_slash = 1;
    
    /* first and second parts can be ignored, as they are covered by the */
    /* substring check */
    /* the rest are either files or directories, but cannot be empty either way */
    while (!trailing_slash && (url_part = strsep(&url_str, "/")) != NULL) {
        /* url_str is null once the final part has been parsed, this is */
        /* important as we need to discriminate the cases: */
        /* http://a//b, http://a/b and http://a/b/ */
        /* strcmp is 0 when one part of the url is an empty string */
        if (strcmp(url_part, "") == 0 && url_str != NULL)
            die("%s: URL cannot contain empty directories\n", program, EXIT_FAILURE);
        last_url_part = url_part;
    }

    /* grab file part of URL */
    char *url_file = NULL;
    /* if / trails, we implicitly assume the file to be 'index.html' */
    if (trailing_slash) {
        char *default_file = "index.html";
        long int default_file_len = strlen(default_file);
        url_file = malloc(default_file_len + 1);
        memset(url_file, 0, default_file_len + 1);
        /* error checking */
        if (url_file == NULL)
            die("%s: could not allocate url_file: %s\n", program, EXIT_FAILURE);
        strcpy(url_file, default_file);
    /* otherwise we use the last parsed token */
    } else {
        long int last_url_part_len = strlen(last_url_part);
        url_file = malloc(last_url_part_len + 1);
        memset(url_file, 0, last_url_part_len + 1);
        /* error checking */
        if (url_file == NULL)
            die("%s: could not allocate url_file: %s\n", program, EXIT_FAILURE);
        strcpy(url_file, last_url_part);
    }

    free(url_str_new);

    return url_file;
}

url parse(char *program, char *req_url_str) {
    long int req_url_str_len = strlen(req_url_str);

    /* check for existence of substring 'http://' */
    if (strstr(req_url_str, "http://") == NULL)
        die("%s: URL must contain substring 'http://'\n", program, EXIT_FAILURE);

    /* parsing steps - check for correct semantics of an url */
    /* a url has to at least be longer than 'http://' */
    if (req_url_str_len <= 7)
        die("%s: URL must be longer than just 'http://'\n", program, EXIT_FAILURE);

    /* make auxiliary variables for parsing */
    long int url_str_len = req_url_str_len - 7;
    char *url_str = malloc(url_str_len + 1);
    memset(url_str, 0, url_str_len + 1);
    /* error checking */
    if (url_str == NULL)
        die("%s: could not allocate url_str: %s\n", program, EXIT_FAILURE);
    strncpy(url_str, req_url_str + 7, url_str_len);

    /* get webserver part of url string */
    char *url_webserver = parse_url_webserver(program, url_str);

    /* get path part of url string */
    char *url_path = parse_url_path(program, url_str);

    /* get file part of url string */
    char *url_file = parse_url_file(program, url_str);

    /* create url structure */
    url request_url;
    request_url.webserver = url_webserver;
    request_url.path = url_path;
    request_url.file = url_file;

    /* cleanup */
    free(url_str);

    return request_url;
}